In [1]:
## Calling different Libraries in python to work on the Determine certain matrices 
## to identify the star restaurants and generate recommendations.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt,seaborn as sns
%matplotlib inline
import warnings
In [2]:
resturant_data=pd.read_excel('data.xlsx')  ## uploading resturant data
In [3]:
resturant_data.head()
Out[3]:
Restaurant ID Restaurant Name Country Code City Address Locality Locality Verbose Longitude Latitude Cuisines Average Cost for two Currency Has Table booking Has Online delivery Price range Aggregate rating Rating color Rating text Votes
0 7402935 Skye 94 Jakarta Menara BCA, Lantai 56, Jl. MH. Thamrin, Thamri... Grand Indonesia Mall, Thamrin Grand Indonesia Mall, Thamrin, Jakarta 106.821999 -6.196778 Italian, Continental 800000 Indonesian Rupiah(IDR) No No 3 4.1 Green Very Good 1498
1 7410290 Satoo - Hotel Shangri-La 94 Jakarta Hotel Shangri-La, Jl. Jend. Sudirman Hotel Shangri-La, Sudirman Hotel Shangri-La, Sudirman, Jakarta 106.818961 -6.203292 Asian, Indonesian, Western 800000 Indonesian Rupiah(IDR) No No 3 4.6 Dark Green Excellent 873
2 7420899 Sushi Masa 94 Jakarta Jl. Tuna Raya No. 5, Penjaringan Penjaringan Penjaringan, Jakarta 106.800144 -6.101298 Sushi, Japanese 500000 Indonesian Rupiah(IDR) No No 3 4.9 Dark Green Excellent 605
3 7421967 3 Wise Monkeys 94 Jakarta Jl. Suryo No. 26, Senopati, Jakarta Senopati Senopati, Jakarta 106.813400 -6.235241 Japanese 450000 Indonesian Rupiah(IDR) No No 3 4.2 Green Very Good 395
4 7422489 Avec Moi Restaurant and Bar 94 Jakarta Gedung PIC, Jl. Teluk Betung 43, Thamrin, Jakarta Thamrin Thamrin, Jakarta 106.821023 -6.196270 French, Western 350000 Indonesian Rupiah(IDR) No No 3 4.3 Green Very Good 243
In [4]:
country_code=pd.read_excel('country-code.xlsx')     ## uploading countru code
country_code.head()
Out[4]:
Country Code Country
0 1 India
1 14 Australia
2 30 Brazil
3 37 Canada
4 94 Indonesia
In [5]:
working_data=pd.merge(resturant_data,country_code,on='Country Code',how='left')
working_data.head()
Out[5]:
Restaurant ID Restaurant Name Country Code City Address Locality Locality Verbose Longitude Latitude Cuisines Average Cost for two Currency Has Table booking Has Online delivery Price range Aggregate rating Rating color Rating text Votes Country
0 7402935 Skye 94 Jakarta Menara BCA, Lantai 56, Jl. MH. Thamrin, Thamri... Grand Indonesia Mall, Thamrin Grand Indonesia Mall, Thamrin, Jakarta 106.821999 -6.196778 Italian, Continental 800000 Indonesian Rupiah(IDR) No No 3 4.1 Green Very Good 1498 Indonesia
1 7410290 Satoo - Hotel Shangri-La 94 Jakarta Hotel Shangri-La, Jl. Jend. Sudirman Hotel Shangri-La, Sudirman Hotel Shangri-La, Sudirman, Jakarta 106.818961 -6.203292 Asian, Indonesian, Western 800000 Indonesian Rupiah(IDR) No No 3 4.6 Dark Green Excellent 873 Indonesia
2 7420899 Sushi Masa 94 Jakarta Jl. Tuna Raya No. 5, Penjaringan Penjaringan Penjaringan, Jakarta 106.800144 -6.101298 Sushi, Japanese 500000 Indonesian Rupiah(IDR) No No 3 4.9 Dark Green Excellent 605 Indonesia
3 7421967 3 Wise Monkeys 94 Jakarta Jl. Suryo No. 26, Senopati, Jakarta Senopati Senopati, Jakarta 106.813400 -6.235241 Japanese 450000 Indonesian Rupiah(IDR) No No 3 4.2 Green Very Good 395 Indonesia
4 7422489 Avec Moi Restaurant and Bar 94 Jakarta Gedung PIC, Jl. Teluk Betung 43, Thamrin, Jakarta Thamrin Thamrin, Jakarta 106.821023 -6.196270 French, Western 350000 Indonesian Rupiah(IDR) No No 3 4.3 Green Very Good 243 Indonesia
In [6]:
working_data.shape ## finding the shape of new matrix
Out[6]:
(9551, 20)
In [7]:
working_data.dtypes
Out[7]:
Restaurant ID             int64
Restaurant Name          object
Country Code              int64
City                     object
Address                  object
Locality                 object
Locality Verbose         object
Longitude               float64
Latitude                float64
Cuisines                 object
Average Cost for two      int64
Currency                 object
Has Table booking        object
Has Online delivery      object
Price range               int64
Aggregate rating        float64
Rating color             object
Rating text              object
Votes                     int64
Country                  object
dtype: object

All data types are observed to be in the correct order¶

In [8]:
working_data.isnull().sum()
Out[8]:
Restaurant ID           0
Restaurant Name         1
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
Country                 0
dtype: int64
In [9]:
working_data=working_data.dropna(how='any')
print(working_data.isnull().sum())
Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                0
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
Country                 0
dtype: int64

We found that there is 1 null value in resturant name and there are 9 null values in cuisines¶

In [10]:
working_data.duplicated()
Out[10]:
0       False
1       False
2       False
3       False
4       False
        ...  
9546    False
9547    False
9548    False
9549    False
9550    False
Length: 9541, dtype: bool
In [11]:
working_data.duplicated().sum()  ## finding total numbers of duplicates in each column
Out[11]:
0

We see that there are no duplicate values in data¶

In [12]:
working_data1=working_data.rename(columns={'Restaurant ID':'resturant_id','Restaurant Name':'restaurant_name','City':'city',
                                           'Country Code':'country_code','Average Cost for two':'average_cost2',
                                           'Has Table booking':'table_booking','Has Online delivery':'deliver_online',
                                           'Price range':'price_range',
                                           'Aggregate rating':'agg_rating','Rating text':'rating_text','Votes':'votes',
                                           'Country':'country' })

## modifying the name of rows and columns for ease of working
In [13]:
working_data1.columns      ## checking the modification of columns
Out[13]:
Index(['resturant_id', 'restaurant_name', 'country_code', 'city', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'average_cost2', 'Currency', 'table_booking', 'deliver_online',
       'price_range', 'agg_rating', 'Rating color', 'rating_text', 'votes',
       'country'],
      dtype='object')
In [14]:
working_data1.city.value_counts()    # finding the number of restaurants city wise
Out[14]:
New Delhi         5473
Gurgaon           1118
Noida             1080
Faridabad          251
Ghaziabad           25
                  ... 
Consort              1
Lincoln              1
Monroe               1
Potrero              1
Lakes Entrance       1
Name: city, Length: 140, dtype: int64
In [15]:
value_counts=working_data1.city.value_counts()                   # graphical represntation of number of restaurants city wise
filtered_value=value_counts[value_counts>20]
plt.figure(figsize=(12,6))
plt.bar(filtered_value.index,filtered_value.values)
plt.xlabel('city')
plt.ylabel('Num of restaurants')
plt.title('city wise distribution of restaurants')
plt.show()

We can see that "New Delhi" is the city with maximum number of restaurants having total number of 5473 Restaurants¶

Many cities have only 1 restaurants like "Miller","Weirton","Potrero","Monroe"¶

In [16]:
working_data1.country.value_counts()   # finding the number of resturant by country
Out[16]:
India             8651
United States      425
United Kingdom      80
South Africa        60
UAE                 60
Brazil              60
New Zealand         40
Turkey              34
Australia           24
Phillipines         22
Indonesia           21
Sri Lanka           20
Qatar               20
Singapore           20
Canada               4
Name: country, dtype: int64
In [17]:
value_counts=working_data1.country.value_counts()  # graphical representation of number of resturants by country
plt.figure(figsize=(20,8))
plt.bar(value_counts.index,value_counts.values)
plt.xlabel('country')
plt.ylabel('Num of restaurants')
plt.title('country wise distribution of restaurants')
plt.xticks(rotation=90)
plt.show()

We observe that India has maximum number of restaurants while Canada has least.¶

In [18]:
working_data1.restaurant_name.value_counts() # finding the count for franchise
Out[18]:
Cafe Coffee Day         83
Domino's Pizza          79
Subway                  63
Green Chick Chop        51
McDonald's              48
                        ..
Mudrika Food Factory     1
Kavita's Restaurant      1
Sam and Scrooge          1
Saheb's Restaurant       1
VNS Live Studio          1
Name: restaurant_name, Length: 7436, dtype: int64
In [19]:
value_counts=working_data1.restaurant_name.value_counts()      # graphical representation of franchise by count
filtered_value=value_counts[value_counts>20]
plt.figure(figsize=(20,10))
plt.bar(filtered_value.index,filtered_value.values)
plt.xlabel('restaurant name')
plt.ylabel('Num of franchise')
plt.title('Restaurants with more than 20 fanchise')
plt.xticks(rotation=90)
plt.show()

It is observed that Cafe Cofee Day with 83 branches.Dominos Pizza is a close second with 79 branches¶

In [20]:
has_booking=working_data1.table_booking.value_counts() # finding the count of restaurants which have table bookingsvs dont
has_booking
Out[20]:
No     8383
Yes    1158
Name: table_booking, dtype: int64
In [21]:
booking_ratio=has_booking[1]/has_booking[0]                 # finding the  table booking ratio
print('Ratio of restaurants haning bookings',booking_ratio)
Ratio of restaurants haning bookings 0.13813670523678873
In [22]:
plt.bar(['No table booking','Table Booking'],has_booking)           # graphical represtation of table booking data
plt.xlabel('Table Booking')
plt.ylabel ('Number of Restaurants')
plt.title('Chart for restaurants having table bookings vs not having')
plt.show()

Its observed that only 1158 restaurants have table bookings having a ratio of 0.13¶

In [23]:
delivery=working_data1.deliver_online.value_counts()    # finding count of restaurants providing online delivery vs dont
In [24]:
delivery=working_data1.deliver_online.value_counts()         ## finding the percentage of restaurants giving online delivery
delivery_avilable=delivery[1]
Num_of_restaurants=working_data1.resturant_id.count()
print('Number of restaurants having online delivery',delivery_avilable)
print ('Total Number of restaurants',Num_of_restaurants)
print ('percentage of restaurants having online delivery',(delivery_avilable/Num_of_restaurants)*100)
Number of restaurants having online delivery 2451
Total Number of restaurants 9541
percentage of restaurants having online delivery 25.68913111833141
In [25]:
delivery=working_data1.deliver_online.value_counts()          # graph reprentation of online delivery vs dont
plt.bar(['No Online delivery','Online Delivery'],delivery)
plt.xlabel('Restaurants having online delivery vs those that do not')
plt.ylabel('Number of Restaurants')
plt.title('Graph for online delivery of restaurants')
plt.show()

It is observed that only 25% of restaurants have online delivery¶

In [26]:
deliver_votes=working_data1.loc[working_data1['deliver_online']=='Yes','votes'].sum()    # finding difference in votes of online delivery vs dont
print('online delivery total votes',deliver_votes)
no_deliver_votes=working_data1.loc[working_data1['deliver_online']=='No','votes'].sum()
print('no delivery total votes',no_deliver_votes)
difference_votes=[no_deliver_votes-deliver_votes]
print('Difference in votes for delivery and non delivery',difference_votes)
online delivery total votes 517914
no delivery total votes 977236
Difference in votes for delivery and non delivery [459322]
In [27]:
grouped_data = working_data1.groupby('deliver_online')    # Get the number of votes for each group
votes_with_delivery = grouped_data.get_group('Yes')['votes'].sum()
votes_without_delivery = grouped_data.get_group('No')['votes'].sum()

# Create a bar chart

plt.bar(['With Delivery', 'Without Delivery'], [votes_with_delivery, votes_without_delivery])
plt.xlabel('Online Delivery')
plt.ylabel('Number of Votes')
plt.title('Number of Votes with and without Online Delivery')
plt.show()
In [28]:
top_cuisines=working_data1.Cuisines.value_counts()  # displaying top 10 cuisines
top_cuisines.head(10)
Out[28]:
North Indian                      936
North Indian, Chinese             511
Chinese                           354
Fast Food                         354
North Indian, Mughlai             334
Cafe                              299
Bakery                            218
North Indian, Mughlai, Chinese    197
Bakery, Desserts                  170
Street Food                       149
Name: Cuisines, dtype: int64
In [29]:
top_cuisines=working_data.Cuisines.value_counts().head(10)
plt.bar(top_cuisines.index,top_cuisines.values)
plt.xlabel('Cousines')
plt.ylabel('count')
plt.title('Top 10 cuisines with count')
plt.xticks(rotation=90)
plt.show()
In [30]:
cuisines_count = working_data1.Cuisines.value_counts().head(3)   # finding the top 3 leading cuisines
cuisines_count
Out[30]:
North Indian             936
North Indian, Chinese    511
Chinese                  354
Name: Cuisines, dtype: int64

It is observed that North Indian cuisine is the most served cuisine across all restaurants¶

In [31]:
grouped_data = working_data1.groupby('city')         # getting most served cuisines across restaurants for each city
for city, group in grouped_data:
    top_cuisine = group['Cuisines'].value_counts().idxmax()
    print(f"Most served cuisine in {city}: {top_cuisine}")
Most served cuisine in Abu Dhabi: Indian
Most served cuisine in Agra: North Indian, Mughlai
Most served cuisine in Ahmedabad: Continental, Chinese, North Indian
Most served cuisine in Albany: Japanese, Steak, Sushi
Most served cuisine in Allahabad: North Indian, Chinese
Most served cuisine in Amritsar: North Indian
Most served cuisine in Ankara: Kebab, Turkish Pizza
Most served cuisine in Armidale: Bar Food, Steak
Most served cuisine in Athens: American, Italian, Pizza
Most served cuisine in Auckland: European
Most served cuisine in Augusta: Mexican
Most served cuisine in Aurangabad: North Indian
Most served cuisine in Balingup: Modern Australian
Most served cuisine in Bandung: Cafe, Coffee and Tea, Western
Most served cuisine in Bangalore: Pizza, Cafe, Italian
Most served cuisine in Beechworth: Pizza, Bar Food
Most served cuisine in Bhopal: Fast Food
Most served cuisine in Bhubaneshwar: Chinese
Most served cuisine in Birmingham: Italian
Most served cuisine in Bogor: Peranakan, Indonesian
Most served cuisine in Boise: Pizza
Most served cuisine in BrasÌ_lia: Italian
Most served cuisine in Cape Town: Seafood, Japanese, Sushi
Most served cuisine in Cedar Rapids/Iowa City: American, Breakfast, Burger
Most served cuisine in Chandigarh: Italian
Most served cuisine in Chatham-Kent: Japanese, Sushi
Most served cuisine in Chennai: North Indian, Mughlai, Chinese, South Indian
Most served cuisine in Clatskanie: American, Breakfast, Desserts
Most served cuisine in Cochrane: Asian, Japanese
Most served cuisine in Coimbatore: Italian, North Indian, Desserts
Most served cuisine in Colombo: Seafood, Italian
Most served cuisine in Columbus: American
Most served cuisine in Consort: Chinese, Canadian
Most served cuisine in Dalton: American, BBQ, Southern
Most served cuisine in Davenport: Mexican
Most served cuisine in Dehradun: North Indian, Chinese
Most served cuisine in Des Moines: American, Italian
Most served cuisine in Dicky Beach: Coffee and Tea, Tea, Modern Australian
Most served cuisine in Doha: Indian
Most served cuisine in Dubai: Indian
Most served cuisine in Dubuque: Mexican
Most served cuisine in East Ballina: Cafe
Most served cuisine in Edinburgh: American
Most served cuisine in Faridabad: North Indian
Most served cuisine in Fernley: Mexican
Most served cuisine in Flaxton: Tea, Modern Australian
Most served cuisine in Forrest: Cafe, Australian
Most served cuisine in Gainesville: American, Southern
Most served cuisine in Ghaziabad: Cafe
Most served cuisine in Goa: Finger Food
Most served cuisine in Gurgaon: North Indian
Most served cuisine in Guwahati: Cafe
Most served cuisine in Hepburn Springs: Cafe, Coffee and Tea, Modern Australian
Most served cuisine in Huskisson: Breakfast, Modern Australian
Most served cuisine in Hyderabad: Mexican, American, Tex-Mex, Burger
Most served cuisine in Indore: North Indian, Chinese
Most served cuisine in Inner City: European, Contemporary
Most served cuisine in Inverloch: Burger, Coffee and Tea, Modern Australian
Most served cuisine in Jaipur: Chinese, Asian
Most served cuisine in Jakarta: Sunda, Indonesian
Most served cuisine in Johannesburg: Continental, South African, Beverages, Desserts, Seafood, Grill, Ice Cream, International
Most served cuisine in Kanpur: North Indian, Chinese
Most served cuisine in Kochi: Cafe, Continental, Italian
Most served cuisine in Kolkata: North Indian, Chinese
Most served cuisine in Lakes Entrance: Breakfast, Coffee and Tea
Most served cuisine in Lakeview: Burger, Desserts, Sandwich
Most served cuisine in Lincoln: Thai
Most served cuisine in London: American, Burger
Most served cuisine in Lorn: Breakfast, Coffee and Tea
Most served cuisine in Lucknow: Cafe, Fast Food
Most served cuisine in Ludhiana: North Indian, Chinese, Continental
Most served cuisine in Macedon: Cafe
Most served cuisine in Macon: Japanese, Sushi, Thai
Most served cuisine in Makati City: Japanese
Most served cuisine in Manchester: Italian
Most served cuisine in Mandaluyong City: Seafood, Asian, Filipino, Indian
Most served cuisine in Mangalore: Seafood
Most served cuisine in Mayfield: Asian
Most served cuisine in Mc Millan: Breakfast, Burger
Most served cuisine in Middleton Beach: Bar Food, Modern Australian
Most served cuisine in Mohali: Continental, North Indian
Most served cuisine in Monroe: Italian, Pizza
Most served cuisine in Montville: Coffee and Tea, Modern Australian
Most served cuisine in Mumbai: Cafe, Italian, Desserts, Fast Food, Chinese, Tea
Most served cuisine in Mysore: North Indian, Chinese, South Indian
Most served cuisine in Nagpur: Cafe
Most served cuisine in Nashik: North Indian
Most served cuisine in New Delhi: North Indian
Most served cuisine in Noida: North Indian
Most served cuisine in Ojo Caliente: American, International, Southwestern
Most served cuisine in Orlando: Brazilian, Steak
Most served cuisine in Palm Cove: Mediterranean, Seafood
Most served cuisine in Panchkula: North Indian, Middle Eastern, Pizza
Most served cuisine in Pasay City: European, Asian, Indian
Most served cuisine in Pasig City: Filipino
Most served cuisine in Patna: North Indian, Chinese
Most served cuisine in Paynesville: Modern Australian
Most served cuisine in Penola: Cafe, Coffee and Tea, Sandwich
Most served cuisine in Pensacola: Burger, Bar Food, Steak
Most served cuisine in Phillip Island: Breakfast, Coffee and Tea, Modern Australian
Most served cuisine in Pocatello: Mexican
Most served cuisine in Potrero: American, BBQ, Burger
Most served cuisine in Pretoria: French
Most served cuisine in Princeton: Seafood
Most served cuisine in Puducherry: Cafe
Most served cuisine in Pune: Continental, North Indian, Mughlai, Burmese
Most served cuisine in Quezon City: Filipino, Mexican
Most served cuisine in Ranchi: North Indian, South Indian, Chinese
Most served cuisine in Randburg: Cafe
Most served cuisine in Rest of Hawaii: Hawaiian, Seafood, Steak
Most served cuisine in Rio de Janeiro: Brazilian
Most served cuisine in San Juan City: Filipino
Most served cuisine in Sandton: Mexican
Most served cuisine in Santa Rosa: Italian, Pizza
Most served cuisine in Savannah: American, Breakfast
Most served cuisine in Secunderabad: North Indian, Chinese
Most served cuisine in Sharjah: American, Mexican
Most served cuisine in Singapore: French
Most served cuisine in Sioux City: American, Seafood, Steak
Most served cuisine in Surat: South Indian
Most served cuisine in SÌ£o Paulo: Brazilian
Most served cuisine in Tagaytay City: Filipino
Most served cuisine in Taguig City: Seafood, American, Mediterranean, Japanese
Most served cuisine in Tampa Bay: American, Desserts, Steak
Most served cuisine in Tangerang: Indonesian
Most served cuisine in Tanunda: Modern Australian, Australian
Most served cuisine in Trentham East: Australian
Most served cuisine in Vadodara: Cafe
Most served cuisine in Valdosta: Mexican
Most served cuisine in Varanasi: Chinese, North Indian
Most served cuisine in Vernonia: Coffee and Tea, Mediterranean
Most served cuisine in Victor Harbor: Coffee and Tea, Tapas, Australian
Most served cuisine in Vineland Station: Italian, Mediterranean, Pizza
Most served cuisine in Vizag: Fast Food
Most served cuisine in Waterloo: Mexican
Most served cuisine in Weirton: Burger, Greek, Sandwich
Most served cuisine in Wellington City: Cafe
Most served cuisine in Winchester Bay: Burger, Seafood, Steak
Most served cuisine in Yorkton: Asian
Most served cuisine in €¡stanbul: Cafe
In [32]:
## Writing a function to convert all currency into USD


def convert_to_usd(currency, average_cost): 
    conversion_rates = {                                  # defining the current currency conversion rates for 1 usd
        'Indonesian Rupiah(IDR)': 14390.50,
        'Indian Rupees(Rs.)': 74.13,
        'Botswana Pula(P)': 11.07,
        'Sri Lankan Rupee(LKR)': 200,
        'Rand(R)': 15.39,
        'Qatari Rial(QR)': 3.64,
        'Dollar($)': 1,
        'Emirati Diram(AED)': 3.67,
        'Brazilian Real(R$)': 5.20,
        'Turkish Lira(TL)': 13.16,
        'Pounds(£)': 0.72,
        'NewZealand($)': 1.42
    }

    if currency in conversion_rates:                        
        conversion_rate = conversion_rates[currency]
        converted_cost = average_cost / conversion_rate
        return converted_cost
    else:
        return None
    
    
In [33]:
working_data1['converted_cost_usd'] = working_data1.apply(lambda row: convert_to_usd(row['Currency'], 
                                      row['average_cost2']), axis=1)  # creating a column for converted currency

working_data1['converted_cost_usd'].head()
Out[33]:
0    55.592231
1    55.592231
2    34.745144
3    31.270630
4    24.321601
Name: converted_cost_usd, dtype: float64
In [34]:
sns.boxplot(data=working_data1,x='converted_cost_usd').set(title='Cost distribution across various restaurants')
average_price=working_data1['converted_cost_usd'].mean()           # finding the average rating
print()
print('Average Price across restaurants',average_price)
Average Price across restaurants 10.669980333842458

It is observed that most restaurants have an price of below U.S.D50 with the and the mean is U.S.D 10.67 there are some restaurants where charges are high ranging from U.S.D 250 to U.S.D 500 and above¶

In [35]:
working_data1.columns  #getting names of columns once again for further analysis
Out[35]:
Index(['resturant_id', 'restaurant_name', 'country_code', 'city', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'average_cost2', 'Currency', 'table_booking', 'deliver_online',
       'price_range', 'agg_rating', 'Rating color', 'rating_text', 'votes',
       'country', 'converted_cost_usd'],
      dtype='object')

Now we will analyse that how the various columns effect the Aggregate Rating for this we seggregate certain columns¶

like for latitude,longitude,locality,locality verbose,address we can analyse city vs ratings also currency is name of currency with values inaverage cost¶

In [36]:
sns.boxplot(data=working_data1,x='agg_rating').set(title='Boxplot of ratings')    # boxplot for rating
print()
average_rating = working_data1['agg_rating'].mean()               # finding the average rating
print('Average rating:', average_rating)
Average rating: 2.6650875170317634

It is observed that most ratings are between 2.5 to 3.5 with their mean at 2.66¶

In [37]:
# Finding relation between price and rating through graph
plot = sns.jointplot(x=working_data1.converted_cost_usd, y=working_data1.agg_rating, kind='scatter')
plot.set_axis_labels('Price (USD)', 'Rating')
plot.fig.suptitle("Rating vs Price", y=1.02)
Out[37]:
Text(0.5, 1.02, 'Rating vs Price')

It is observed that most of the scatter is around around 0 of x axis so we can say that rating does not increase with price¶

In [38]:
plot = sns.jointplot(x=working_data1.votes, y=working_data1.agg_rating, kind='scatter') # code to plot rating vs votes
plot.set_axis_labels('Votes', 'Rating')
plot.fig.suptitle("Rating vs votes", y=1.02)
Out[38]:
Text(0.5, 1.02, 'Rating vs votes')

From the plot its observed that rating does not increase when vote count increases¶

In [39]:
sns.catplot(data=working_data1, x='table_booking', y='agg_rating', kind='bar', ci=None)
plt.xlabel('Table Booking')
plt.ylabel('Aggregate Rating')
plt.title('Rating Distribution by Table Booking')
plt.show()

From the bar graph its observed that restaurants which have table booking have higher ratings¶

In [40]:
plt.figure(figsize=[30, 650])
boxplot = sns.boxplot(y='Cuisines', x='agg_rating', data=working_data1, palette='Set2')
boxplot.set(title='Rating vs City')
boxplot.set_yticklabels(boxplot.get_yticklabels(), fontsize=20)

plt.show()

It is observed that restuarants which serve mexican and filipino food have the maximum mean rating while resturants which serve only North Indian or Chinese have maximum variations in ratings¶

In [41]:
plt.figure(figsize=[30, 100])
boxplot = sns.boxplot(y='city', x='agg_rating', data=working_data1, palette='Set2')
boxplot.set(title='Rating vs City')
boxplot.set_yticklabels(boxplot.get_yticklabels(), fontsize=20)

plt.show()

Its observed that Pasig city has the highest mean rating while new delhi has maximum variations in rating¶

In [42]:
sns.catplot(data=working_data1,x='deliver_online',y='agg_rating', kind='bar', ci=None)
plt.xlabel('Online Delivery')
plt.ylabel('Aggregate Rating')
plt.title('Rating Distribution by Delivery')
plt.show()

It is observed that restaurants which have online delivery are better rated¶

In [43]:
## checking for best average ratings across top 100 restaurants average ratings
top_restaurants = working_data1.groupby('restaurant_name')['agg_rating'].mean().nlargest(100)
top_restaurants = top_restaurants.reset_index()

plt.figure(figsize=(50,100))
ax = sns.barplot(data=top_restaurants, x='agg_rating', y='restaurant_name', palette='Set2')
ax.set_title('Average Rating by Top Restaurants', fontsize=30)
ax.set_xlabel('Average Rating', fontsize=30)
ax.set_ylabel('Restaurant Name', fontsize=30)
ax.tick_params(axis='y', labelsize=40)  # Adjust font size of y-axis tick labels
plt.show()

It is observed that Atlanta Highway seafood market has the best average rating with Bao as close second¶

In [44]:
sns.catplot(data=working_data1,x='Rating color',y='agg_rating', kind='bar', ci=None)
plt.xlabel('Rating Color')
plt.ylabel('Aggregate Rating')
plt.title('Aggregate rating color distribution')
Out[44]:
Text(0.5, 1.0, 'Aggregate rating color distribution')

It is observed that dark green colour code has highest aggregate rating and white has least¶

In [45]:
sns.catplot(data=working_data1,x='rating_text',y='agg_rating', kind='bar', ci=None)
plt.xlabel('Rating Text')
plt.ylabel('Aggregate Rating')
plt.title('Aggregate rating Text distribution')
plt.show()

It is observed that most ratings are excellent¶

In [46]:
sns.catplot(data=working_data1,x='price_range',y='agg_rating', kind='bar', ci=None)
plt.xlabel('Price Range')
plt.ylabel('Aggregate Rating')
plt.title('Aggregate rating Price Range distribution')
plt.show()

It is observed that ratings are highest where price range is for 4¶

In [47]:
plt.figure(figsize=(30, 50))
boxplot = sns.boxplot(y='agg_rating', x='country_code', data=working_data1, palette='Set2')
boxplot.set_title('Rating vs Country Code', fontsize=30)
boxplot.set_xticks(range(len(working_data1['country_code'].unique())))  # Set y-axis tick positions
boxplot.set_xticklabels(boxplot.get_xticklabels(), fontsize=30)
boxplot.tick_params(axis='x', labelsize=30)  # Increase font size of x-axis labels
plt.show()

It is observed that country code 162 Phillipines has the highest mean in aggregate ratings while country code 1 which is INdia has the least¶

In [ ]:
 
In [ ]: